import os

from project_setting import KEYWORDS_EXTEND_DATA_PATH, NO_NEED_CHECK_URL_DIR
def get_no_needcheck_urls(province, source_type):
    '''
    根据province和source_type,获取到不需要提取的urls列表
    '''
    result_set = set()
    file_path = os.path.join(NO_NEED_CHECK_URL_DIR, province, source_type)
    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as fread:
            for line in fread:
                result_set.add(line.strip())
    return list(result_set)


def get_extend_keywords(province, source_type):
    '''
        根据province和source_type,获取到额外网站单独处理的词条
        mode  type    keyword
        +   common_电话    咨询电话
        -   ingore  咨询电话
    '''
    add_keys = []
    file_path = os.path.join(KEYWORDS_EXTEND_DATA_PATH, province, source_type)
    if os.path.exists(file_path):
        with open(file_path, 'r', encoding='utf-8') as fread:
            for line in fread:
                # mode,type,keyword = line.strip().split("\t")
                add_keys.append(line.strip().split(" "))
    return add_keys